/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    SubspaceCluster.java
 *    Copyright (C) 2001 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.datagenerators.clusterers;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.Tag;
import weka.core.Utils;
import weka.datagenerators.ClusterDefinition;
import weka.datagenerators.ClusterGenerator;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

/**
 * <!-- globalinfo-start --> A data generator that produces data points in
 * hyperrectangular subspace clusters.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -h
 *  Prints this help.
 * </pre>
 * 
 * <pre>
 * -o &lt;file&gt;
 *  The name of the output file, otherwise the generated data is
 *  printed to stdout.
 * </pre>
 * 
 * <pre>
 * -r &lt;name&gt;
 *  The name of the relation.
 * </pre>
 * 
 * <pre>
 * -d
 *  Whether to print debug informations.
 * </pre>
 * 
 * <pre>
 * -S
 *  The seed for random function (default 1)
 * </pre>
 * 
 * <pre>
 * -a &lt;num&gt;
 *  The number of attributes (default 1).
 * </pre>
 * 
 * <pre>
 * -c
 *  Class Flag, if set, the cluster is listed in extra attribute.
 * </pre>
 * 
 * <pre>
 * -b &lt;range&gt;
 *  The indices for boolean attributes.
 * </pre>
 * 
 * <pre>
 * -m &lt;range&gt;
 *  The indices for nominal attributes.
 * </pre>
 * 
 * <pre>
 * -P &lt;num&gt;
 *  The noise rate in percent (default 0.0).
 *  Can be between 0% and 30%. (Remark: The original 
 *  algorithm only allows noise up to 10%.)
 * </pre>
 * 
 * <pre>
 * -C &lt;cluster-definition&gt;
 *  A cluster definition of class 'SubspaceClusterDefinition'
 *  (definition needs to be quoted to be recognized as 
 *  a single argument).
 * </pre>
 * 
 * <pre>
 * Options specific to weka.datagenerators.clusterers.SubspaceClusterDefinition:
 * </pre>
 * 
 * <pre>
 * -A &lt;range&gt;
 *  Generates randomly distributed instances in the cluster.
 * </pre>
 * 
 * <pre>
 * -U &lt;range&gt;
 *  Generates uniformly distributed instances in the cluster.
 * </pre>
 * 
 * <pre>
 * -G &lt;range&gt;
 *  Generates gaussian distributed instances in the cluster.
 * </pre>
 * 
 * <pre>
 * -D &lt;num&gt;,&lt;num&gt;
 *  The attribute min/max (-A and -U) or mean/stddev (-G) for
 *  the cluster.
 * </pre>
 * 
 * <pre>
 * -N &lt;num&gt;..&lt;num&gt;
 *  The range of number of instances per cluster (default 1..50).
 * </pre>
 * 
 * <pre>
 * -I
 *  Uses integer instead of continuous values (default continuous).
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 1.5 $
 */
public class SubspaceCluster extends ClusterGenerator {

	/** for serialization */
	static final long serialVersionUID = -3454999858505621128L;

	/** noise rate in percent (option P, between 0 and 30) */
	protected double m_NoiseRate;

	/** cluster list */
	protected ClusterDefinition[] m_Clusters;

	/** if nominal, store number of values */
	protected int[] m_numValues;

	/** store global min values */
	protected double[] m_globalMinValue;

	/** store global max values */
	protected double[] m_globalMaxValue;

	/** cluster type: uniform/random */
	public static final int UNIFORM_RANDOM = 0;
	/** cluster type: total uniform */
	public static final int TOTAL_UNIFORM = 1;
	/** cluster type: gaussian */
	public static final int GAUSSIAN = 2;
	/** the tags for the cluster types */
	public static final Tag[] TAGS_CLUSTERTYPE = {
			new Tag(UNIFORM_RANDOM, "uniform/random"),
			new Tag(TOTAL_UNIFORM, "total uniform"),
			new Tag(GAUSSIAN, "gaussian") };

	/** cluster subtype: continuous */
	public static final int CONTINUOUS = 0;
	/** cluster subtype: integer */
	public static final int INTEGER = 1;
	/** the tags for the cluster types */
	public static final Tag[] TAGS_CLUSTERSUBTYPE = {
			new Tag(CONTINUOUS, "continuous"), new Tag(INTEGER, "integer") };

	/**
	 * initializes the generator, sets the number of clusters to 0, since user
	 * has to specify them explicitly
	 */
	public SubspaceCluster() {
		super();

		setNoiseRate(defaultNoiseRate());
	}

	/**
	 * Returns a string describing this data generator.
	 * 
	 * @return a description of the data generator suitable for displaying in
	 *         the explorer/experimenter gui
	 */
	public String globalInfo() {
		return "A data generator that produces data points in "
				+ "hyperrectangular subspace clusters.";
	}

	/**
	 * Returns an enumeration describing the available options.
	 * 
	 * @return an enumeration of all the available options
	 */
	public Enumeration listOptions() {
		Vector result = enumToVector(super.listOptions());

		result.addElement(new Option("\tThe noise rate in percent (default "
				+ defaultNoiseRate() + ").\n"
				+ "\tCan be between 0% and 30%. (Remark: The original \n"
				+ "\talgorithm only allows noise up to 10%.)", "P", 1,
				"-P <num>"));

		result.addElement(new Option("\tA cluster definition of class '"
				+ SubspaceClusterDefinition.class.getName().replaceAll(".*\\.",
						"") + "'\n"
				+ "\t(definition needs to be quoted to be recognized as \n"
				+ "\ta single argument).", "C", 1, "-C <cluster-definition>"));

		result.addElement(new Option("", "", 0, "\nOptions specific to "
				+ SubspaceClusterDefinition.class.getName() + ":"));

		result.addAll(enumToVector(new SubspaceClusterDefinition(this)
				.listOptions()));

		return result.elements();
	}

	/**
	 * Parses a list of options for this object.
	 * <p/>
	 * 
	 * <!-- options-start --> Valid options are:
	 * <p/>
	 * 
	 * <pre>
	 * -h
	 *  Prints this help.
	 * </pre>
	 * 
	 * <pre>
	 * -o &lt;file&gt;
	 *  The name of the output file, otherwise the generated data is
	 *  printed to stdout.
	 * </pre>
	 * 
	 * <pre>
	 * -r &lt;name&gt;
	 *  The name of the relation.
	 * </pre>
	 * 
	 * <pre>
	 * -d
	 *  Whether to print debug informations.
	 * </pre>
	 * 
	 * <pre>
	 * -S
	 *  The seed for random function (default 1)
	 * </pre>
	 * 
	 * <pre>
	 * -a &lt;num&gt;
	 *  The number of attributes (default 1).
	 * </pre>
	 * 
	 * <pre>
	 * -c
	 *  Class Flag, if set, the cluster is listed in extra attribute.
	 * </pre>
	 * 
	 * <pre>
	 * -b &lt;range&gt;
	 *  The indices for boolean attributes.
	 * </pre>
	 * 
	 * <pre>
	 * -m &lt;range&gt;
	 *  The indices for nominal attributes.
	 * </pre>
	 * 
	 * <pre>
	 * -P &lt;num&gt;
	 *  The noise rate in percent (default 0.0).
	 *  Can be between 0% and 30%. (Remark: The original 
	 *  algorithm only allows noise up to 10%.)
	 * </pre>
	 * 
	 * <pre>
	 * -C &lt;cluster-definition&gt;
	 *  A cluster definition of class 'SubspaceClusterDefinition'
	 *  (definition needs to be quoted to be recognized as 
	 *  a single argument).
	 * </pre>
	 * 
	 * <pre>
	 * Options specific to weka.datagenerators.clusterers.SubspaceClusterDefinition:
	 * </pre>
	 * 
	 * <pre>
	 * -A &lt;range&gt;
	 *  Generates randomly distributed instances in the cluster.
	 * </pre>
	 * 
	 * <pre>
	 * -U &lt;range&gt;
	 *  Generates uniformly distributed instances in the cluster.
	 * </pre>
	 * 
	 * <pre>
	 * -G &lt;range&gt;
	 *  Generates gaussian distributed instances in the cluster.
	 * </pre>
	 * 
	 * <pre>
	 * -D &lt;num&gt;,&lt;num&gt;
	 *  The attribute min/max (-A and -U) or mean/stddev (-G) for
	 *  the cluster.
	 * </pre>
	 * 
	 * <pre>
	 * -N &lt;num&gt;..&lt;num&gt;
	 *  The range of number of instances per cluster (default 1..50).
	 * </pre>
	 * 
	 * <pre>
	 * -I
	 *  Uses integer instead of continuous values (default continuous).
	 * </pre>
	 * 
	 * <!-- options-end -->
	 * 
	 * @param options
	 *            the list of options as an array of strings
	 * @throws Exception
	 *             if an option is not supported
	 */
	public void setOptions(String[] options) throws Exception {
		String tmpStr;
		SubspaceClusterDefinition cl;
		Vector list;
		int clCount;

		super.setOptions(options);

		m_numValues = new int[getNumAttributes()];
		// numValues might be changed by a cluster definition
		// (only relevant for nominal data)
		for (int i = 0; i < getNumAttributes(); i++)
			m_numValues[i] = 1;

		tmpStr = Utils.getOption('P', options);
		if (tmpStr.length() != 0)
			setNoiseRate(Double.parseDouble(tmpStr));
		else
			setNoiseRate(defaultNoiseRate());

		// cluster definitions
		list = new Vector();

		clCount = 0;
		do {
			tmpStr = Utils.getOption('C', options);
			if (tmpStr.length() != 0) {
				clCount++;
				cl = new SubspaceClusterDefinition(this);
				cl.setOptions(Utils.splitOptions(tmpStr));
				list.add(cl);
			}
		} while (tmpStr.length() != 0);

		m_Clusters = (ClusterDefinition[]) list
				.toArray(new ClusterDefinition[list.size()]);
		// in case no cluster definition was provided, make sure that there's at
		// least one definition present -> see getClusters()
		getClusters();
	}

	/**
	 * Gets the current settings of the datagenerator.
	 * 
	 * @return an array of strings suitable for passing to setOptions
	 */
	public String[] getOptions() {
		Vector result;
		String[] options;
		int i;

		result = new Vector();
		options = super.getOptions();
		for (i = 0; i < options.length; i++)
			result.add(options[i]);

		result.add("-P");
		result.add("" + getNoiseRate());

		for (i = 0; i < getClusters().length; i++) {
			result.add("-C");
			result.add(Utils.joinOptions(getClusters()[i].getOptions()));
		}

		return (String[]) result.toArray(new String[result.size()]);
	}

	/**
	 * returns the current cluster definitions, if necessary initializes them
	 * 
	 * @return the current cluster definitions
	 */
	protected ClusterDefinition[] getClusters() {
		if ((m_Clusters == null) || (m_Clusters.length == 0)) {
			if (m_Clusters != null)
				System.out
						.println("NOTE: at least 1 cluster definition is necessary, "
								+ "created default one.");
			m_Clusters = new ClusterDefinition[] { new SubspaceClusterDefinition(
					this) };
		}

		return m_Clusters;
	}

	/**
	 * returns the default number of attributes
	 * 
	 * @return the default number of attributes
	 */
	protected int defaultNumAttributes() {
		return 1;
	}

	/**
	 * Sets the number of attributes the dataset should have.
	 * 
	 * @param numAttributes
	 *            the new number of attributes
	 */
	public void setNumAttributes(int numAttributes) {
		super.setNumAttributes(numAttributes);
		m_numValues = new int[getNumAttributes()];
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String numAttributesTipText() {
		return "The number of attributes the generated data will contain (Note: they must be covered by the cluster definitions!)";
	}

	/**
	 * returns the default noise rate
	 * 
	 * @return the default noise rate
	 */
	protected double defaultNoiseRate() {
		return 0.0;
	}

	/**
	 * Gets the percentage of noise set.
	 * 
	 * @return the percentage of noise set
	 */
	public double getNoiseRate() {
		return m_NoiseRate;
	}

	/**
	 * Sets the percentage of noise set.
	 * 
	 * @param newNoiseRate
	 *            new percentage of noise
	 */
	public void setNoiseRate(double newNoiseRate) {
		m_NoiseRate = newNoiseRate;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String noiseRateTipText() {
		return "The noise rate to use.";
	}

	/**
	 * returns the currently set clusters
	 * 
	 * @return the currently set clusters
	 */
	public ClusterDefinition[] getClusterDefinitions() {
		return getClusters();
	}

	/**
	 * sets the clusters to use
	 * 
	 * @param value
	 *            the clusters do use
	 * @throws Exception
	 *             if clusters are not the correct class
	 */
	public void setClusterDefinitions(ClusterDefinition[] value)
			throws Exception {

		String indexStr;

		indexStr = "";
		m_Clusters = value;
		for (int i = 0; i < getClusters().length; i++) {
			if (!(getClusters()[i] instanceof SubspaceClusterDefinition)) {
				if (indexStr.length() != 0)
					indexStr += ",";
				indexStr += "" + (i + 1);
			}
			getClusters()[i].setParent(this);
			getClusters()[i].setOptions(getClusters()[i].getOptions()); // for
																		// initializing!
		}

		// any wrong classes encountered?
		if (indexStr.length() != 0)
			throw new Exception("These cluster definitions are not '"
					+ SubspaceClusterDefinition.class.getName() + "': "
					+ indexStr);
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String clusterDefinitionsTipText() {
		return "The clusters to use.";
	}

	/**
	 * Checks, whether all attributes are covered by cluster definitions and
	 * returns TRUE in that case.
	 * 
	 * @return whether all attributes are covered
	 */
	protected boolean checkCoverage() {
		int i;
		int n;
		int[] count;
		Range r;
		String attrIndex;
		SubspaceClusterDefinition cl;

		// check whether all the attributes are covered
		count = new int[getNumAttributes()];
		for (i = 0; i < getNumAttributes(); i++) {
			for (n = 0; n < getClusters().length; n++) {
				cl = (SubspaceClusterDefinition) getClusters()[n];
				r = new Range(cl.getAttrIndexRange());
				r.setUpper(getNumAttributes());
				if (r.isInRange(i))
					count[i]++;
			}
		}

		// list all indices that are not covered
		attrIndex = "";
		for (i = 0; i < count.length; i++) {
			if (count[i] == 0) {
				if (attrIndex.length() != 0)
					attrIndex += ",";
				attrIndex += (i + 1);
			}
		}

		if (attrIndex.length() != 0)
			throw new IllegalArgumentException(
					"The following attributes are not covered by a cluster "
							+ "definition: " + attrIndex + "\n");

		return true;
	}

	/**
	 * Gets the single mode flag.
	 * 
	 * @return true if methode generateExample can be used.
	 */
	public boolean getSingleModeFlag() {
		return false;
	}

	/**
	 * Initializes the format for the dataset produced.
	 * 
	 * @return the output data format
	 * @throws Exception
	 *             data format could not be defined
	 */

	public Instances defineDataFormat() throws Exception {

		// initialize
		setOptions(getOptions());

		checkCoverage();

		Random random = new Random(getSeed());
		setRandom(random);
		Instances dataset;
		FastVector attributes = new FastVector(3);
		Attribute attribute;
		boolean classFlag = getClassFlag();

		FastVector classValues = null;
		if (classFlag)
			classValues = new FastVector(getClusters().length);
		FastVector boolValues = new FastVector(2);
		boolValues.addElement("false");
		boolValues.addElement("true");
		FastVector nomValues = null;

		// define dataset
		for (int i = 0; i < getNumAttributes(); i++) {
			// define boolean attribute
			if (m_booleanCols.isInRange(i)) {
				attribute = new Attribute("B" + i, boolValues);
			} else if (m_nominalCols.isInRange(i)) {
				// define nominal attribute
				nomValues = new FastVector(m_numValues[i]);
				for (int j = 0; j < m_numValues[i]; j++)
					nomValues.addElement("value-" + j);
				attribute = new Attribute("N" + i, nomValues);
			} else {
				// numerical attribute
				attribute = new Attribute("X" + i);
			}
			attributes.addElement(attribute);
		}

		if (classFlag) {
			for (int i = 0; i < getClusters().length; i++)
				classValues.addElement("c" + i);
			attribute = new Attribute("class", classValues);
			attributes.addElement(attribute);
		}

		dataset = new Instances(getRelationNameToUse(), attributes, 0);
		if (classFlag)
			dataset.setClassIndex(m_NumAttributes);

		// set dataset format of this class
		Instances format = new Instances(dataset, 0);
		setDatasetFormat(format);

		for (int i = 0; i < getClusters().length; i++) {
			SubspaceClusterDefinition cl = (SubspaceClusterDefinition) getClusters()[i];
			cl.setNumInstances(random);
			cl.setParent(this);
		}

		return dataset;
	}

	/**
	 * Returns true if attribute is boolean
	 * 
	 * @param index
	 *            of the attribute
	 * @return true if the attribute is boolean
	 */
	public boolean isBoolean(int index) {
		return m_booleanCols.isInRange(index);
	}

	/**
	 * Returns true if attribute is nominal
	 * 
	 * @param index
	 *            of the attribute
	 * @return true if the attribute is nominal
	 */
	public boolean isNominal(int index) {
		return m_nominalCols.isInRange(index);
	}

	/**
	 * returns array that stores the number of values for a nominal attribute.
	 * 
	 * @return the array that stores the number of values for a nominal
	 *         attribute
	 */
	public int[] getNumValues() {
		return m_numValues;
	}

	/**
	 * Generate an example of the dataset.
	 * 
	 * @return the instance generated
	 * @throws Exception
	 *             if format not defined or generating <br/>
	 *             examples one by one is not possible, because voting is chosen
	 */

	public Instance generateExample() throws Exception {
		throw new Exception("Examples cannot be generated one by one.");
	}

	/**
	 * Generate all examples of the dataset.
	 * 
	 * @return the instance generated
	 * @throws Exception
	 *             if format not defined
	 */

	public Instances generateExamples() throws Exception {
		Instances format = getDatasetFormat();
		Instance example = null;

		if (format == null)
			throw new Exception("Dataset format not defined.");

		// generate examples for one cluster after another
		for (int cNum = 0; cNum < getClusters().length; cNum++) {
			SubspaceClusterDefinition cl = (SubspaceClusterDefinition) getClusters()[cNum];

			// get the number of instances to create
			int instNum = cl.getNumInstances();

			// class value is c + cluster number
			String cName = "c" + cNum;

			switch (cl.getClusterType().getSelectedTag().getID()) {
			case (UNIFORM_RANDOM):
				for (int i = 0; i < instNum; i++) {
					// generate example
					example = generateExample(format, getRandom(), cl, cName);
					if (example != null)
						format.add(example);
				}
				break;
			case (TOTAL_UNIFORM):
				// generate examples
				if (!cl.isInteger())
					generateUniformExamples(format, instNum, cl, cName);
				else
					generateUniformIntegerExamples(format, instNum, cl, cName);
				break;
			case (GAUSSIAN):
				// generate examples
				generateGaussianExamples(format, instNum, getRandom(), cl,
						cName);
				break;
			}
		}

		return format;
	}

	/**
	 * Generate an example of the dataset.
	 * 
	 * @param format
	 *            the dataset format
	 * @param randomG
	 *            the random number generator to use
	 * @param cl
	 *            the cluster definition
	 * @param cName
	 *            the class value
	 * @return the generated instance
	 */
	private Instance generateExample(Instances format, Random randomG,
			SubspaceClusterDefinition cl, String cName) {

		boolean makeInteger = cl.isInteger();
		int num = -1;
		Instance example = null;
		int numAtts = m_NumAttributes;
		if (getClassFlag())
			numAtts++;

		example = new Instance(numAtts);
		example.setDataset(format);
		boolean[] attributes = cl.getAttributes();
		double[] minValue = cl.getMinValue();
		double[] maxValue = cl.getMaxValue();
		double value;

		int clusterI = -1;
		for (int i = 0; i < m_NumAttributes; i++) {
			if (attributes[i]) {
				clusterI++;
				num++;
				// boolean or nominal attribute
				if (isBoolean(i) || isNominal(i)) {

					if (minValue[clusterI] == maxValue[clusterI]) {
						value = minValue[clusterI];
					} else {
						int numValues = (int) (maxValue[clusterI]
								- minValue[clusterI] + 1.0);
						value = randomG.nextInt(numValues);
						value += minValue[clusterI];
					}
				} else {
					// numeric attribute
					value = randomG.nextDouble()
							* (maxValue[num] - minValue[num]) + minValue[num];
					if (makeInteger)
						value = Math.round(value);
				}
				example.setValue(i, value);
			} else {
				example.setMissing(i);
			}
		}

		if (getClassFlag())
			example.setClassValue(cName);

		return example;
	}

	/**
	 * Generate examples for a uniform cluster dataset.
	 * 
	 * @param format
	 *            the dataset format
	 * @param numInstances
	 *            the number of instances to generator
	 * @param cl
	 *            the cluster definition
	 * @param cName
	 *            the class value
	 */
	private void generateUniformExamples(Instances format, int numInstances,
			SubspaceClusterDefinition cl, String cName) {

		Instance example = null;
		int numAtts = m_NumAttributes;
		if (getClassFlag())
			numAtts++;

		example = new Instance(numAtts);
		example.setDataset(format);
		boolean[] attributes = cl.getAttributes();
		double[] minValue = cl.getMinValue();
		double[] maxValue = cl.getMaxValue();
		double[] diff = new double[minValue.length];

		for (int i = 0; i < minValue.length; i++)
			diff[i] = (maxValue[i] - minValue[i]);

		for (int j = 0; j < numInstances; j++) {
			int num = -1;
			for (int i = 0; i < m_NumAttributes; i++) {
				if (attributes[i]) {
					num++;
					double value = minValue[num]
							+ (diff[num] * (double) ((double) j / (double) (numInstances - 1)));
					example.setValue(i, value);
				} else {
					example.setMissing(i);
				}
			}
			if (getClassFlag())
				example.setClassValue(cName);
			format.add(example);
		}
	}

	/**
	 * Generate examples for a uniform cluster dataset.
	 * 
	 * @param format
	 *            the dataset format
	 * @param numInstances
	 *            the number of instances to generator
	 * @param cl
	 *            the cluster definition
	 * @param cName
	 *            the class value
	 */
	private void generateUniformIntegerExamples(Instances format,
			int numInstances, SubspaceClusterDefinition cl, String cName) {

		Instance example = null;
		int numAtts = m_NumAttributes;
		if (getClassFlag())
			numAtts++;

		example = new Instance(numAtts);
		example.setDataset(format);
		boolean[] attributes = cl.getAttributes();
		double[] minValue = cl.getMinValue();
		double[] maxValue = cl.getMaxValue();
		int[] minInt = new int[minValue.length];
		int[] maxInt = new int[maxValue.length];
		int[] intValue = new int[maxValue.length];
		int[] numInt = new int[minValue.length];

		int num = 1;
		for (int i = 0; i < minValue.length; i++) {
			minInt[i] = (int) Math.ceil(minValue[i]);
			maxInt[i] = (int) Math.floor(maxValue[i]);
			numInt[i] = (maxInt[i] - minInt[i] + 1);
			num = num * numInt[i];
		}
		int numEach = numInstances / num;
		int rest = numInstances - numEach * num;

		// initialize with smallest values combination
		for (int i = 0; i < m_NumAttributes; i++) {
			if (attributes[i]) {
				example.setValue(i, (double) minInt[i]);
				intValue[i] = minInt[i];
			} else {
				example.setMissing(i);
			}
		}
		if (getClassFlag())
			example.setClassValue(cName);
		int added = 0;
		int attr = 0;
		// do while not added all
		do {
			// add all for one value combination
			for (int k = 0; k < numEach; k++) {
				format.add(example);
				example = (Instance) example.copy();
				added++;
			}
			if (rest > 0) {
				format.add(example);
				example = (Instance) example.copy();
				added++;
				rest--;
			}

			if (added >= numInstances)
				break;
			// switch to the next value combination
			boolean done = false;
			do {
				if (attributes[attr] && (intValue[attr] + 1 <= maxInt[attr])) {
					intValue[attr]++;
					done = true;
				} else {
					attr++;
				}
			} while (!done);

			example.setValue(attr, (double) intValue[attr]);
		} while (added < numInstances);
	}

	/**
	 * Generate examples for a uniform cluster dataset.
	 * 
	 * @param format
	 *            the dataset format
	 * @param numInstances
	 *            the number of instances to generate
	 * @param random
	 *            the random number generator
	 * @param cl
	 *            the cluster definition
	 * @param cName
	 *            the class value
	 */
	private void generateGaussianExamples(Instances format, int numInstances,
			Random random, SubspaceClusterDefinition cl, String cName) {

		boolean makeInteger = cl.isInteger();
		Instance example = null;
		int numAtts = m_NumAttributes;
		if (getClassFlag())
			numAtts++;

		example = new Instance(numAtts);
		example.setDataset(format);
		boolean[] attributes = cl.getAttributes();
		double[] meanValue = cl.getMeanValue();
		double[] stddevValue = cl.getStddevValue();

		for (int j = 0; j < numInstances; j++) {
			int num = -1;
			for (int i = 0; i < m_NumAttributes; i++) {
				if (attributes[i]) {
					num++;
					double value = meanValue[num]
							+ (random.nextGaussian() * stddevValue[num]);
					if (makeInteger)
						value = Math.round(value);
					example.setValue(i, value);
				} else {
					example.setMissing(i);
				}
			}
			if (getClassFlag())
				example.setClassValue(cName);
			format.add(example);
		}
	}

	/**
	 * Compiles documentation about the data generation after the generation
	 * process
	 * 
	 * @return string with additional information about generated dataset
	 * @throws Exception
	 *             no input structure has been defined
	 */
	public String generateFinished() throws Exception {
		return "";
	}

	/**
	 * Compiles documentation about the data generation before the generation
	 * process
	 * 
	 * @return string with additional information
	 */
	public String generateStart() {
		StringBuffer docu = new StringBuffer();

		int sumInst = 0;
		for (int cNum = 0; cNum < getClusters().length; cNum++) {
			SubspaceClusterDefinition cl = (SubspaceClusterDefinition) getClusters()[cNum];
			docu.append("%\n");
			docu.append("% Cluster: c" + cNum + "   ");
			switch (cl.getClusterType().getSelectedTag().getID()) {
			case UNIFORM_RANDOM:
				docu.append("Uniform Random");
				break;
			case TOTAL_UNIFORM:
				docu.append("Total Random");
				break;
			case GAUSSIAN:
				docu.append("Gaussian");
				break;
			}
			if (cl.isInteger()) {
				docu.append(" / INTEGER");
			}

			docu.append("\n% ----------------------------------------------\n");
			docu.append("%" + cl.attributesToString());

			docu.append("\n% Number of Instances:            "
					+ cl.getInstNums() + "\n");
			docu.append("% Generated Number of Instances:  "
					+ cl.getNumInstances() + "\n");
			sumInst += cl.getNumInstances();
		}
		docu.append("%\n% ----------------------------------------------\n");
		docu.append("% Total Number of Instances: " + sumInst + "\n");
		docu.append("%                            in " + getClusters().length
				+ " Cluster(s)\n%");

		return docu.toString();
	}

	/**
	 * Returns the revision string.
	 * 
	 * @return the revision
	 */
	public String getRevision() {
		return RevisionUtils.extract("$Revision: 1.5 $");
	}

	/**
	 * Main method for testing this class.
	 * 
	 * @param args
	 *            should contain arguments for the data producer:
	 */
	public static void main(String[] args) {
		runDataGenerator(new SubspaceCluster(), args);
	}
}
